
#=============================================================================#
#
# This R file contains the code used to clean the Study 1 dataset
# 
#=============================================================================#

# Initial settings ------------------------------------------------------------

rm(list=ls())
getwd()

# Install and load all necessary packages with the ipak function:
# i.e., check to see if packages are installed. 
# Install them if they are not, 
# then load them into the R session.

ipak <- function(pkg){  new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}

packages <- c("readxl", "dplyr", "tidyr") 
ipak(packages)


# Data wrangling --------------------------------------------------------------

## Import data ----------------------------------------------------------------
# note: dataset from Godefroidt (2023) but restricted to
#         - Jihadist attacks
#         - responses of Western citizens
# and updated to include more time points

df <- read_excel("data/update-metaanalysis-data.xlsx", col_names = TRUE, skip = 2)


## Clean data -----------------------------------------------------------------

### 1. subset the data according to additional inclusion criteria -------------

# subset to studies on Islamist terrorism 
# extra criterion 1
df <- df %>% filter(Type == 1)

# subset to Western countries/population
# extra criterion 2
unique(sort(df$Country))
df$Country2[df$Country=="Australia" |
              df$Country=="Austria" |
              df$Country=="Belgium" |
              df$Country=="Belgium, Czech Republic, Germany, Finland, and Netherlands" |
              df$Country=="Canada" |
              df$Country=="Czech Republic" |
              df$Country=="Denmark" |
              df$Country=="Estonia" |
              df$Country=="EU" |
              df$Country=="Finland" |
              df$Country=="France" |
              df$Country=="Germany" |
              df$Country=="Germany, Netherlands, and Sweden" |
              df$Country=="Ireland" |
              df$Country=="Italy" |
              df$Country=="Luxembourg" | 
              df$Country=="Netherlands" |
              df$Country=="New Zealand" |
              df$Country=="Northern Ireland" |
              df$Country=="Norway" |
              df$Country=="Poland" |
              df$Country=="Portugal" |
              df$Country=="Romania" |
              df$Country=="Slovakia" |
              df$Country=="Slovenia" |
              df$Country=="Spain" |
              df$Country=="Sweden" |
              df$Country=="Switzerland" |
              df$Country=="US, UK, and Australia"|
              df$Country=="UK"|
              df$Country=="US"] <- "Western"
df$Country2[is.na(df$Country2)] <- "Non-Western"
df$Country2 <- as.factor(df$Country2)
table(df$Country2)
df <- df %>% filter(Country2 == "Western")
df$US <- as.factor(ifelse(df$Country=="US", 1, 0))

# double-check whether extra selection criteria are correctly applied
# only Islamic attacks conducted in western countries or unspecified
sort(unique(df$ExactAttack_2))

# delete outlier in StudyYear
df <- df %>% filter((StudyYear != '1985') %>% replace_na(TRUE)) 



### 2. create necessary variables ---------------------------------------------

#### 2.1. control variables already included in the dataset ------------------- 
# casualty rate
df$CasualtiesLess10 <- as.factor(ifelse(df$CasualtiesCat == 0, 1, 0))
df$Casualties10to100 <- as.factor(ifelse(df$CasualtiesCat == 1, 1, 0))
df$Casualties100plus <- as.factor(ifelse(df$CasualtiesCat == 2, 1, 0))
df$CasualtiesNotStated <- as.factor(ifelse(df$CasualtiesCat == -99, 1, 0))

# research design
## panel studies included in our definition of natural experiments (pre/post)
df$TypeStudy <- as.factor(if_else(df$TypeStudy == 4, 2, df$TypeStudy))
summary(df$TypeStudy)
## define experiments and natural experiments (vs. correlational)
df$Exp <- as.factor(ifelse(df$TypeStudy == 1, 1, 0))
df$NatExp <- as.factor(ifelse(df$TypeStudy == 2, 1, 0))

# probability samples
df$ProbSample <- as.factor(ifelse(df$GeneralPop == 1, 1, 0))

# 9/11 dummy 
df$NineEleven <- as.factor(
  ifelse(df$ExactAttack_2 == "2001 September 11 Attacks" & !is.na(df$ExactAttack_2), 1, 0))
summary(df$NineEleven)

# outcome domains
df <- df %>%
  mutate(Domain = case_when(
    PA_Category %in% c(10, 99) ~ "Outgroup hostility",
    PA_Category %in% c(5, 6, 7, 8, 9, 11) ~ "Conservative shifts",
    PA_Category %in% c(1, 2, 3, 4) ~ "Rally effects",
    TRUE ~ NA_character_
  )) %>%
  # re-order levels
  mutate(Domain = factor(Domain,
                         levels = c("Conservative shifts",
                                    "Outgroup hostility",
                                    "Rally effects"))) %>%
  # create dummy variables
  mutate(
    Conservative = ifelse(Domain == "Conservative shifts", 1, 0),
    Outgroup     = ifelse(Domain == "Outgroup hostility", 1, 0),
    Rally        = ifelse(Domain == "Rally effects", 1, 0)
  )


#### 2.2. number of previous attacks ------------------------------------------

# import fondapol data
fondapol <- read_excel("data/attentats_islamistes_datafondapol_retained.xlsx")

# clean and filter data
fondapol <- fondapol %>%
  mutate(
    YEAR = as.numeric(YEAR),
    REGION = as.character(REGION),
    COUNTRY = as.character(COUNTRY),
    COUNTRY = recode(COUNTRY, 
                     "United States" = "US",
                     "United Kingdom" = "UK")
    ) %>%
  filter(!REGION %in% c(
    "East & Central Asia",
    "Middle East & North Africa",
    "South America",
    "South Asia",
    "Southeast Asia",
    "Sub-Saharan Africa") 
    ) %>%
  filter(!COUNTRY %in% c("Russia")
         )


# create variables...
df <- df %>%
  rowwise() %>%
  mutate(
    PastAttacks_Country_5yr = if (!is.na(StudyYear))
      sum(fondapol$COUNTRY == Country &
            fondapol$YEAR >= (StudyYear - 5) & fondapol$YEAR < StudyYear)
    else NA_integer_,
    
    PastAttacks_West_5yr = if (!is.na(StudyYear))
      sum(fondapol$YEAR >= (StudyYear - 5) & fondapol$YEAR < StudyYear)
    else NA_integer_,
    
    PastAttacks_Country_All = if (!is.na(StudyYear))
      sum(fondapol$COUNTRY == Country & fondapol$YEAR < StudyYear)
    else NA_integer_,
    
    PastAttacks_West_All = if (!is.na(StudyYear))
      sum(fondapol$YEAR < StudyYear)
    else NA_integer_
  ) %>%
  ungroup()



### 3. select necessary variables ---------------------------------------------

df <- df %>% 
  
  # select relevant variables
  select(
    # ID variables
    Author, ajps, ID_R, ID_RS, ID_ES_Unique, 
    # study features
    TypeReport, Year, TypeStudy, StudyYear, SampleSize,
    Exp, NatExp, StudentPop, ProbSample, Country, US,
    AgeMean, FemaleP, LiberalP, 
    # previous number of attacks
    # attack features
    IV_Exact, Attack, ExactAttack_2, 
    NineEleven,
    Casualties, CasualtiesCat, 
    CasualtiesLess10, Casualties10to100, Casualties100plus, CasualtiesNotStated,
    PastAttacks_Country_5yr, PastAttacks_West_5yr,
    PastAttacks_Country_All, PastAttacks_West_All,
    # outcome features
    DV_Exact, DV_Code, PA_Category, Domain,
    Conservative, Outgroup, Rally,
    # effect sizes
    Regression, d, var_d, pval_d, ess) %>%
  
  # measurement levels
  mutate(ID_R = as.character(ID_R),
         StudyYear = as.numeric(StudyYear),
         SampleSize = as.numeric(ess),
         d = as.numeric(d),
         se_d = sqrt(df$var_d)
  )


saveRDS(df, file = "data/df.rds")